
# Figure_4.R
# Aim: to reproduce Figure 4 of Atsusaka and Holbrook (2026)

# Load packages
library(tidyverse)

# Panel (a) check "round and cand" =============================================

dt_contest <- read_csv("archive-election.csv")

dt_contest %>%
  ggplot(aes(x = n_cands + 1, y = n_rounds)) +
  geom_point(color = "darkcyan", alpha = 0.5) +
  geom_abline(intercept = 0, slope = 1, linetype = "dashed") +
  theme_bw() +
  xlab("# candidates + 1 (write-in)") +
  ylab("# rounds") -> plot_round


# Panel (b) check monotonicity =================================================

# read data --------------------------------------------------------------------

raw <- read_csv("archive-candidate.csv")
id <- unique(raw$election_id)

# Function for targeted reporting ----------------------------------------------
combine_rounds <- function(data) {
  # Get all unique round numbers, sorted
  rounds <- sort(unique(data$round))

  # Create a named list of data frames, one for each round
  round_dfs <- map(rounds, function(r) {
    df <- data %>%
      filter(round == r) %>%
      select(-round) %>%
      arrange(desc(votes), name) %>%
      rename(!!paste0("votes_r", r) := votes)
    return(df)
  })

  # Reduce (left join) by "name"
  combined <- reduce(round_dfs, left_join, by = "name")

  return(combined)
}

# Apply to all elections: ------------------------------------------------------
results_df <- map_dfr(id, function(eid) {
  raw %>%
    filter(election_id == eid) %>%
    distinct(name, round, .keep_all = TRUE) %>%
    select(name, round, votes) %>%
    combine_rounds() %>%
    mutate(election_id = eid) %>%
    select(election_id, name, everything())
})

results_df <- results_df %>%
  arrange(election_id, desc(votes_r1))


# R1 v. R2
results_df %>%
  filter(!is.na(votes_r2),
         votes_r2 != 0) %>%
  dplyr::select(election_id, name, votes_r1, votes_r2) %>%
  mutate(mono_r12 = ifelse(votes_r1 <= votes_r2, 1, 0)) %>%
  summarize(mean_mono_r12 = mean(mono_r12)) %>% pull() -> r2


results_df %>%
  filter(!is.na(votes_r2),
         votes_r2 != 0) %>%
  dplyr::select(election_id, name, votes_r1, votes_r2) %>%
  mutate(mono_r12 = ifelse(votes_r1 <= votes_r2, 1, 0)) %>%
  filter(mono_r12 == 0)


# R2 v. R3
results_df %>%
  filter(!is.na(votes_r3),
         votes_r3 != 0) %>%
  dplyr::select(election_id, name, votes_r2, votes_r3) %>%
  mutate(mono_r23 = ifelse(votes_r2 <= votes_r3, 1, 0)) %>%
  summarize(mean_mono_r23 = mean(mono_r23)) %>% pull()  -> r3


results_df %>%
  filter(!is.na(votes_r3),
         votes_r3 != 0) %>%
  dplyr::select(election_id, name, votes_r2, votes_r3) %>%
  mutate(mono_r23 = ifelse(votes_r2 <= votes_r3, 1, 0)) %>%
  filter(mono_r23 == 0)


# R3 v. R4
results_df %>%
  filter(!is.na(votes_r4),
         votes_r4 != 0) %>%
  dplyr::select(election_id, name, votes_r3, votes_r4) %>%
  mutate(mono_r34 = ifelse(votes_r3 <= votes_r4, 1, 0)) %>%
  summarize(mean_mono_r34 = mean(mono_r34)) %>% pull() -> r4

results_df %>%
  filter(!is.na(votes_r4),
         votes_r4 != 0) %>%
  dplyr::select(election_id, name, votes_r3, votes_r4) %>%
  mutate(mono_r34 = ifelse(votes_r3 <= votes_r4, 1, 0)) %>%
  filter(mono_r34 == 0)


# R4 v. R5
results_df %>%
  filter(!is.na(votes_r5),
         votes_r5 != 0) %>%
  dplyr::select(election_id, name, votes_r4, votes_r5) %>%
  mutate(mono_r45 = ifelse(votes_r4 <= votes_r5, 1, 0)) %>%
  summarize(mean_mono_r45 = mean(mono_r45)) %>% pull() -> r5


# R5 v. R6
results_df %>%
  filter(!is.na(votes_r6),
         votes_r6 != 0) %>%
  dplyr::select(election_id, name, votes_r5, votes_r6) %>%
  mutate(mono_r56 = ifelse(votes_r5 <= votes_r6, 1, 0)) %>%
  summarize(mean_mono_r56 = mean(mono_r56)) %>% pull() -> r6

results_df %>%
  filter(!is.na(votes_r6),
         votes_r6 != 0) %>%
  dplyr::select(election_id, name, votes_r5, votes_r6) %>%
  mutate(mono_r56 = ifelse(votes_r5 <= votes_r6, 1, 0)) %>%
  filter(mono_r56 == 0)


# R6 v. R7
results_df %>%
  filter(!is.na(votes_r7),
         votes_r7 != 0) %>%
  dplyr::select(election_id, name, votes_r6, votes_r7) %>%
  mutate(mono_r67 = ifelse(votes_r6 <= votes_r7, 1, 0)) %>%
  summarize(mean_mono_r67 = mean(mono_r67)) %>% pull() -> r7

# R7 v. R8
results_df %>%
  filter(!is.na(votes_r8),
         votes_r8 != 0) %>%
  dplyr::select(election_id, name, votes_r7, votes_r8) %>%
  mutate(mono_r78 = ifelse(votes_r7 <= votes_r8, 1, 0)) %>%
  summarize(mean_mono_r78 = mean(mono_r78)) %>% pull() -> r8


# R8 v. R9
results_df %>%
  filter(!is.na(votes_r9),
         votes_r9 != 0) %>%
  dplyr::select(election_id, name, votes_r8, votes_r9) %>%
  mutate(mono_r89 = ifelse(votes_r8 <= votes_r9, 1, 0)) %>%
  summarize(mean_mono_r89 = mean(mono_r89)) %>% pull() -> r9

# R9 v. R10
results_df %>%
  filter(!is.na(votes_r10),
         votes_r10 != 0) %>%
  dplyr::select(election_id, name, votes_r9, votes_r10) %>%
  mutate(mono_r910 = ifelse(votes_r9 <= votes_r10, 1, 0)) %>%
  summarize(mean_mono_r910 = mean(mono_r910)) %>% pull() -> r10


df <- data.frame(prop_mono = c(r2, r3, r4, r5, r6, r7, r8, r9, r10),
                 round = 2:10)


ggplot(df, aes(x = factor(round), y = prop_mono)) +
  geom_col(fill = "darkcyan", alpha = 0.5) +
  geom_hline(yintercept = 1, linetype = "dashed") +
  labs(x = "Round", y = "Prop. Monotonicity") +
  theme_minimal()  -> plot_mono

plot_mono



# Panel (c) check winners ======================================================
raw <- read_csv("archive-candidate.csv")

sanity <- raw %>%
  mutate(winner = ifelse(status %in% c("Elected", "Winner"), 1, 0)) %>%
  group_by(election_id) %>%
  summarize(n_winner = sum(winner)) %>%
  ungroup()

table(sanity$n_winner) -> tab_winner
tab_winner


# Convert table to data frame and compute proportions
df_winner <- as.data.frame(tab_winner)
colnames(df_winner) <- c("n_winner", "count")
df_winner$prop <- df_winner$count / sum(df_winner$count)

# Plot proportions
ggplot(df_winner, aes(x = as.factor(n_winner), y = prop)) +
  geom_col(fill = "darkcyan", alpha = 0.5) +
  geom_hline(yintercept = 1, linetype = "dashed") +
  labs(x = "# winners per election", y = "Proportion") +
  theme_minimal() -> plot_winner


# Panel (d) check "transfer" ===================================================
dt_cand <- read_csv("archive-candidate.csv")
dt_round <- read_csv("archive-round.csv")
dt_contest <- read_csv("archive-election.csv")

out <- dt_cand %>%
  left_join(dt_contest, by = c("election_id")) %>%
  mutate(transfer = as.numeric(transfer_calc)) %>%
  dplyr::select(election_id, n_rounds, round, name, votes, transfer) %>%
  filter(n_rounds > 1)


out_check <- out %>%
  group_by(election_id) %>%
  summarize(
    sum_transfer = sum(transfer, na.rm = TRUE)
  ) %>%
  ungroup()


ggplot(out_check, aes(x = sum_transfer/1000)) +
  geom_histogram(fill = "darkcyan", color = "darkcyan", alpha = 0.5) +
  labs(x = "# transferred votes (unit: 1000)", y = "Count") +
  geom_vline(xintercept = 0, linetype = "dashed") +
#  coord_cartesian(xlim = c(-5000, 10000)) +
  theme_minimal() -> plot_transfer



# Combine all panels ===========================================================

ggpubr::ggarrange(plot_round, plot_mono,
                  plot_winner, plot_transfer,
                  nrow = 2,
                  ncol = 2,
                  labels = c("a", "b", "c", "d"))

ggsave("Figure_4.pdf", width = 6, height = 6)
